options(future.globals.maxSize= 891289600)
### Make sure all the packages are installed
if (!requireNamespace("Seurat", quietly = TRUE))
install.packages("Seurat")
if (!requireNamespace("dplyr", quietly = TRUE))
install.packages("dplyr")
if (!requireNamespace("devtools", quietly = TRUE))
install.packages("devtools")
devtools::install_github("jinworks/CellChat")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if (!requireNamespace("AnnotationDbi", quietly = TRUE))
BiocManager::install("AnnotationDbi")
if (!requireNamespace("org.Hs.eg.db", quietly = TRUE))
BiocManager::install("org.Hs.eg.db")
if (!requireNamespace("ComplexHeatmap", quietly = TRUE))
BiocManager::install("ComplexHeatmap")
### Load all the necessary libraries
library(Seurat)
library(dplyr)
library(CellChat)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(ComplexHeatmap)
# if (!requireNamespace("cowplot", quietly = TRUE))
# install.packages("cowplot")
# library(cowplot)
# if (!requireNamespace("ggplot2", quietly = TRUE))
# install.packages("ggplot2")
# library(ggplot2)
# if (!requireNamespace("stringr", quietly = TRUE))
# install.packages("stringr")
# library(stringr)8 - Cell-Cell Communication analysis
Introduction
In this notebooks we are going to carry out a cell-cell communication analysis using CellChat. You can see the GitHub repository here, the CellChat vignette here and the differential communication analysis vignette here. The goal of the notebook is to identify which communication pathways are altered between flu and covid infection!
Some associated literature which is a must read are:
Key Takeaways
The Ligand-Receptor (L-R) database used gather the collective prior knowledge and has a great impact on the results obtained.
Different CCC tools have varying assumptions, therefore, the tool of choise will also have a major impact on the results.
CellChatandCellPhoneDBmake a point of modelling CCC events taking into account heteromeric complexes. This ensures all the subunits of a protein complex are expressed to consider a cell-cell interaction feasible. This assumption reduces false positive predictions.CellChat, additionally, accounts for interaction mediator proteins such as agonists.
Broadly, CCC tools are generally able to capture relevant biological signals. However, predicted interactions tend to have false positives, if available leveraging information from additional modalities and analyses could help to refine the predictions.
CCC inference from scRNAseq data makes the assumption that gene expression is a proxy for protein levels. Moreover, they don’t (and can’t) account for other intermediate steps between translation and protein function such as post-translational modifications, secretion, diffusion…
Library
Load data
We’re going to be working with a dataset from the paper - Immunophenotyping of COVID-19 and influenza highlights the role of type I interferons in development of severe COVID-19 Download data from cellxgene portal.
# Download the data in data/ directory
# download.file(
# url = "https://datasets.cellxgene.cziscience.com/d8e35450-de43-451a-9979-276eac688bce.rds",
# destfile = "../data/workshop-data.rds",
# method = "wget",
# extra = "-r -p --random-wait")
# We can also use the CLI with the wget command below
# wget https://datasets.cellxgene.cziscience.com/d8e35450-de43-451a-9979-276eac688bce.rds
se <- readRDS("../data/workshop-data.rds")Analysis
Convert ENSEMBL IDs to Gene Symbols
Right away we can see how ensembl ids are used in the rownames. Let’s transform them into their matched symbols to make them human-readable:
head(rownames(se))[1] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" "ENSG00000000457"
[5] "ENSG00000000460" "ENSG00000000938"
Convert to gene symbols
gene_df <- readr::read_csv(file = "../data/cov_flu_gene_names_table.csv")
symbol_id <- data.frame(index = rownames(se)) %>%
left_join(gene_df) %>%
pull(feature_name)
# re-create seurat object
mtx <- se@assays$RNA@data
rownames(mtx) <- symbol_id
se <- CreateSeuratObject(counts = mtx, meta.data = se@meta.data)# symbol_id <- mapIds(
# org.Hs.eg.db,
# keys = rownames(se),
# column = "SYMBOL",
# keytype = "ENSEMBL",
# multiVals = "first")
#
# # df <- data.frame(symbol = symbol_id, ensembl = names(symbol_id))
# all(rownames(se) == names(symbol_id))
#
# # re-create seurat object
# mtx <- se@assays$RNA@data
# rownames(mtx) <- symbol_id
#
# # Remove NAs
# sum(is.na(rownames(mtx)))
# dim(mtx)
# mtx <- mtx[!is.na(rownames(mtx)), ]
# dim(mtx)
#
# rownames(mtx) <- make.unique(rownames(mtx))
# se <- CreateSeuratObject(counts = mtx, meta.data = se@meta.data)
#
# sum(is.na(rownames(se)))Split dataset
# First we will remove the Uncategorized1/2 populations as we have seen that they are doublets
se <- se[, ! se$Celltype %in% c("Uncategorized1", "Uncategorized2")]
# Create samples so the CellChat knows which cells come from each sample
se$samples <- se$`Sample ID`
# Convert to character to drop inexisting factors
se$Celltype <- factor(as.character(se$Celltype))# Subset to cells coming form COVID-19 patients
covid <- se[, se$disease == "COVID-19"]
# Subset to cells coming form flu patients
flu <- se[, se$disease == "influenza"]Let’s visualize the cell type populations between these 2 datasets:
prop.table(table(covid$Celltype))
B cell, IgG- B cell, IgG+ CD4, EM-like
0.089433284 0.042724155 0.059219337
CD4, non-EM-like CD8, EM-like CD8, non-EM-like
0.049844847 0.061244488 0.099657031
classical Monocyte DC intermediate Monocyte
0.289955904 0.009407153 0.011170995
NK cell nonclassical Monocyte Platelet
0.130818226 0.026490283 0.107169688
RBC
0.022864609
prop.table(table(flu$Celltype))
B cell, IgG- B cell, IgG+ CD4, EM-like
0.031253033 0.012908861 0.037173639
CD4, non-EM-like CD8, EM-like CD8, non-EM-like
0.006405901 0.029117733 0.140929826
classical Monocyte DC intermediate Monocyte
0.514025041 0.003785305 0.015626517
NK cell nonclassical Monocyte Platelet
0.140638649 0.020479472 0.017664758
RBC
0.029991265
Carry out pre-processing steps on covid data and create the cellchat object
# Normalize data
covid <- NormalizeData(covid, verbose = FALSE)
cc_covid <- createCellChat(
object = covid,
group.by = "Celltype",
assay = "RNA",
do.sparse = TRUE)[1] "Create a CellChat object from a Seurat object"
The `meta.data` slot in the Seurat object is used as cell meta information
Set cell identities for the new CellChat object
The cell groups used for CellChat analysis are B cell, IgG-, B cell, IgG+, CD4, EM-like, CD4, non-EM-like, CD8, EM-like, CD8, non-EM-like, classical Monocyte, DC, intermediate Monocyte, NK cell, nonclassical Monocyte, Platelet, RBC
Same for the flu
# Normalize data
flu <- NormalizeData(flu, verbose = FALSE)
cc_flu <- createCellChat(
object = flu,
group.by = "Celltype",
assay = "RNA",
do.sparse = TRUE)[1] "Create a CellChat object from a Seurat object"
The `meta.data` slot in the Seurat object is used as cell meta information
Set cell identities for the new CellChat object
The cell groups used for CellChat analysis are B cell, IgG-, B cell, IgG+, CD4, EM-like, CD4, non-EM-like, CD8, EM-like, CD8, non-EM-like, classical Monocyte, DC, intermediate Monocyte, NK cell, nonclassical Monocyte, Platelet, RBC
CellChat CCC analysis
Cellchat intuition
How is the communication probability computed? See the methods section in the CellChat paper.
The communication probability Pi,j from cell groups i to j for a particular ligand-receptor pair k was modeled by
\[ P_{i,j}^k = \frac{{L_iR_j}}{{K_h + L_iR_j}} \times \left( {1 + \frac{{AG_i}}{{K_h + AG_i}}} \right) \cdot \left( {1 + \frac{{AG_j}}{{K_h + AG_j}}} \right) \\ \times \frac{{K_h}}{{K_h + AN_i}} \cdot \frac{{K_h}}{{K_h + AN_j}} \times \frac{{n_in_j}}{{n^2}}, \\ {L_i = \root {{m1}} \of {{L_{i,1} \cdots L_{i,m1}}},\,R_j = \root {{m2}} \of {{R_{j,1} \cdots R_{j,m2}}} \cdot \frac{{1 + RA_j}}{{1 + RI_j}}.} \]
Li and Rj represent the expression level of ligand L and receptor R in cell group i and cell group j, respectively
Kh whose default value was set to be 0.5
AG is the average expression of the L-R agonists
CellChat Database
Define the use of the human L-R database
CellChatDB <- CellChatDB.human
showDatabaseCategory(CellChatDB)glimpse(CellChatDB)List of 4
$ interaction:'data.frame': 3234 obs. of 28 variables:
..$ interaction_name : chr [1:3234] "TGFB1_TGFBR1_TGFBR2" "TGFB2_TGFBR1_TGFBR2" "TGFB3_TGFBR1_TGFBR2" "TGFB1_ACVR1B_TGFBR2" ...
..$ pathway_name : chr [1:3234] "TGFb" "TGFb" "TGFb" "TGFb" ...
..$ ligand : chr [1:3234] "TGFB1" "TGFB2" "TGFB3" "TGFB1" ...
..$ receptor : chr [1:3234] "TGFbR1_R2" "TGFbR1_R2" "TGFbR1_R2" "ACVR1B_TGFbR2" ...
..$ agonist : chr [1:3234] "TGFb agonist" "TGFb agonist" "TGFb agonist" "TGFb agonist" ...
..$ antagonist : chr [1:3234] "TGFb antagonist" "TGFb antagonist" "TGFb antagonist" "TGFb antagonist" ...
..$ co_A_receptor : chr [1:3234] "" "" "" "" ...
..$ co_I_receptor : chr [1:3234] "TGFb inhibition receptor" "TGFb inhibition receptor" "TGFb inhibition receptor" "TGFb inhibition receptor" ...
..$ evidence : chr [1:3234] "KEGG: hsa04350" "KEGG: hsa04350" "KEGG: hsa04350" "PMID: 27449815" ...
..$ annotation : chr [1:3234] "Secreted Signaling" "Secreted Signaling" "Secreted Signaling" "Secreted Signaling" ...
..$ interaction_name_2 : chr [1:3234] "TGFB1 - (TGFBR1+TGFBR2)" "TGFB2 - (TGFBR1+TGFBR2)" "TGFB3 - (TGFBR1+TGFBR2)" "TGFB1 - (ACVR1B+TGFBR2)" ...
..$ is_neurotransmitter : logi [1:3234] FALSE FALSE FALSE FALSE FALSE FALSE ...
..$ ligand.symbol : chr [1:3234] "TGFB1" "TGFB2" "TGFB3" "TGFB1" ...
..$ ligand.family : chr [1:3234] "TGF-beta" "TGF-beta" "TGF-beta" "TGF-beta" ...
..$ ligand.location : chr [1:3234] "Extracellular matrix, Secreted, Extracellular space" "Extracellular matrix, Secreted, Extracellular space" "Extracellular matrix, Secreted, Extracellular space" "Extracellular matrix, Secreted, Extracellular space" ...
..$ ligand.keyword : chr [1:3234] "Disease variant, Signal, Reference proteome, Extracellular matrix, Disulfide bond, Secreted, Mitogen, Growth fa"| __truncated__ "Disease variant, Chromosomal rearrangement, Signal, Reference proteome, Disulfide bond, Secreted, Extracellular"| __truncated__ "Disease variant, Signal, Reference proteome, Extracellular matrix, Disulfide bond, Secreted, Mitogen, Cardiomyo"| __truncated__ "Disease variant, Signal, Reference proteome, Extracellular matrix, Disulfide bond, Secreted, Mitogen, Growth fa"| __truncated__ ...
..$ ligand.secreted_type : chr [1:3234] "growth factor" "growth factor" "cytokine;growth factor" "growth factor" ...
..$ ligand.transmembrane : logi [1:3234] FALSE FALSE TRUE FALSE FALSE FALSE ...
..$ receptor.symbol : chr [1:3234] "TGFBR2, TGFBR1" "TGFBR2, TGFBR1" "TGFBR2, TGFBR1" "TGFBR2, ACVR1B" ...
..$ receptor.family : chr [1:3234] "Protein kinase superfamily, TKL Ser/Thr protein kinase" "Protein kinase superfamily, TKL Ser/Thr protein kinase" "Protein kinase superfamily, TKL Ser/Thr protein kinase" "Protein kinase superfamily, TKL Ser/Thr protein kinase" ...
..$ receptor.location : chr [1:3234] "Cell membrane, Secreted, Membrane raft, Cell surface, Cell junction, Tight junction" "Cell membrane, Secreted, Membrane raft, Cell surface, Cell junction, Tight junction" "Cell membrane, Secreted, Membrane raft, Cell surface, Cell junction, Tight junction" "Cell membrane, Secreted, Membrane raft" ...
..$ receptor.keyword : chr [1:3234] "Membrane, Secreted, Disulfide bond, Kinase, Transmembrane, Differentiation, ATP-binding, Receptor, Alternative "| __truncated__ "Membrane, Secreted, Disulfide bond, Kinase, Transmembrane, Differentiation, ATP-binding, Receptor, Alternative "| __truncated__ "Membrane, Secreted, Disulfide bond, Kinase, Transmembrane, Differentiation, ATP-binding, Receptor, Alternative "| __truncated__ "Membrane, Secreted, Disulfide bond, Kinase, Transmembrane, Differentiation, ATP-binding, Receptor, Alternative "| __truncated__ ...
..$ receptor.surfaceome_main: chr [1:3234] "Receptors" "Receptors" "Receptors" "Receptors" ...
..$ receptor.surfaceome_sub : chr [1:3234] "Act.TGFB;Kinase" "Act.TGFB;Kinase" "Act.TGFB;Kinase" "Act.TGFB;Kinase" ...
..$ receptor.adhesome : chr [1:3234] "" "" "" "" ...
..$ receptor.secreted_type : chr [1:3234] "" "" "" "" ...
..$ receptor.transmembrane : logi [1:3234] TRUE TRUE TRUE TRUE TRUE TRUE ...
..$ version : chr [1:3234] "CellChatDB v1" "CellChatDB v1" "CellChatDB v1" "CellChatDB v1" ...
$ complex :'data.frame': 338 obs. of 5 variables:
..$ subunit_1: chr [1:338] "INHBA" "INHA" "INHA" "IL12A" ...
..$ subunit_2: chr [1:338] "INHBB" "INHBA" "INHBB" "IL12B" ...
..$ subunit_3: chr [1:338] "" "" "" "" ...
..$ subunit_4: chr [1:338] "" "" "" "" ...
..$ subunit_5: chr [1:338] "" "" "" "" ...
$ cofactor :'data.frame': 32 obs. of 16 variables:
..$ cofactor1 : chr [1:32] "FST" "BAMBI" "TIE1" "PTPRB" ...
..$ cofactor2 : chr [1:32] "" "" "" "" ...
..$ cofactor3 : chr [1:32] "" "" "" "" ...
..$ cofactor4 : chr [1:32] "" "" "" "" ...
..$ cofactor5 : chr [1:32] "" "" "" "" ...
..$ cofactor6 : chr [1:32] "" "" "" "" ...
..$ cofactor7 : chr [1:32] "" "" "" "" ...
..$ cofactor8 : chr [1:32] "" "" "" "" ...
..$ cofactor9 : chr [1:32] "" "" "" "" ...
..$ cofactor10: chr [1:32] "" "" "" "" ...
..$ cofactor11: chr [1:32] "" "" "" "" ...
..$ cofactor12: chr [1:32] "" "" "" "" ...
..$ cofactor13: chr [1:32] "" "" "" "" ...
..$ cofactor14: chr [1:32] "" "" "" "" ...
..$ cofactor15: chr [1:32] "" "" "" "" ...
..$ cofactor16: chr [1:32] "" "" "" "" ...
$ geneInfo : tibble [26,827 × 8] (S3: tbl_df/tbl/data.frame)
..$ EntryID.uniprot: chr [1:26827] "P04217" "Q9NQ94" "P01023" "A8K2U0" ...
..$ Symbol : chr [1:26827] "A1BG" "A1CF" "A2M" "A2ML1" ...
..$ Synonym : chr [1:26827] NA "ACF ASP" "CPAMD5" "CPAMD9" ...
..$ Reviewed : chr [1:26827] "reviewed" "reviewed" "reviewed" "reviewed" ...
..$ EntryName : chr [1:26827] "A1BG_HUMAN" "A1CF_HUMAN" "A2MG_HUMAN" "A2ML1_HUMAN" ...
..$ ProteinName : chr [1:26827] "Alpha-1B-glycoprotein (Alpha-1-B glycoprotein)" "APOBEC1 complementation factor (APOBEC1-stimulating protein)" "Alpha-2-macroglobulin (Alpha-2-M) (C3 and PZP-like alpha-2-macroglobulin domain-containing protein 5)" "Alpha-2-macroglobulin-like protein 1 (C3 and PZP-like alpha-2-macroglobulin domain-containing protein 9)" ...
..$ Keywords : chr [1:26827] "Alternative splicing;Direct protein sequencing;Disulfide bond;Glycoprotein;Immunoglobulin domain;Reference prot"| __truncated__ "3D-structure;Alternative splicing;Cytoplasm;Endoplasmic reticulum;mRNA processing;Nucleus;Phosphoprotein;Refere"| __truncated__ "3D-structure;Bait region;Direct protein sequencing;Disulfide bond;Glycoprotein;Isopeptide bond;Protease inhibit"| __truncated__ "3D-structure;Alternative splicing;Bait region;Disulfide bond;Glycoprotein;Protease inhibitor;Reference proteome"| __truncated__ ...
..$ Location : chr [1:26827] "SUBCELLULAR LOCATION: Secreted." "SUBCELLULAR LOCATION: Nucleus {ECO:0000269|PubMed:12881431, ECO:0000269|PubMed:24916387}. Endoplasmic reticulum"| __truncated__ "SUBCELLULAR LOCATION: Secreted {ECO:0000269|PubMed:6203908}." "SUBCELLULAR LOCATION: Secreted {ECO:0000269|PubMed:16298998}." ...
CellChatDB$interaction %>%
dplyr::select(interaction_name, pathway_name, ligand, receptor, agonist, evidence, annotation) %>%
head() interaction_name pathway_name ligand receptor
TGFB1_TGFBR1_TGFBR2 TGFB1_TGFBR1_TGFBR2 TGFb TGFB1 TGFbR1_R2
TGFB2_TGFBR1_TGFBR2 TGFB2_TGFBR1_TGFBR2 TGFb TGFB2 TGFbR1_R2
TGFB3_TGFBR1_TGFBR2 TGFB3_TGFBR1_TGFBR2 TGFb TGFB3 TGFbR1_R2
TGFB1_ACVR1B_TGFBR2 TGFB1_ACVR1B_TGFBR2 TGFb TGFB1 ACVR1B_TGFbR2
TGFB1_ACVR1C_TGFBR2 TGFB1_ACVR1C_TGFBR2 TGFb TGFB1 ACVR1C_TGFbR2
TGFB2_ACVR1B_TGFBR2 TGFB2_ACVR1B_TGFBR2 TGFb TGFB2 ACVR1B_TGFbR2
agonist evidence annotation
TGFB1_TGFBR1_TGFBR2 TGFb agonist KEGG: hsa04350 Secreted Signaling
TGFB2_TGFBR1_TGFBR2 TGFb agonist KEGG: hsa04350 Secreted Signaling
TGFB3_TGFBR1_TGFBR2 TGFb agonist KEGG: hsa04350 Secreted Signaling
TGFB1_ACVR1B_TGFBR2 TGFb agonist PMID: 27449815 Secreted Signaling
TGFB1_ACVR1C_TGFBR2 TGFb agonist PMID: 27449815 Secreted Signaling
TGFB2_ACVR1B_TGFBR2 TGFb agonist PMID: 27449815 Secreted Signaling
CellChatDB$interaction %>%
dplyr::select(interaction_name, pathway_name, ligand, receptor, agonist, evidence, annotation) %>%
tail() interaction_name
Testosterone-Testosterone-HSD17B12_AR Testosterone-Testosterone-HSD17B12_AR
Testosterone-Testosterone-HSD17B3_AR Testosterone-Testosterone-HSD17B3_AR
Thyrostimulin-GPHB5_GPHA2_TSHR Thyrostimulin-GPHB5_GPHA2_TSHR
Thyrotropin-CGA_TSHB_TSHR Thyrotropin-CGA_TSHB_TSHR
Triiodothyronine-T3-DIO3_THRA Triiodothyronine-T3-DIO3_THRA
Triiodothyronine-T3-DIO3_THRB Triiodothyronine-T3-DIO3_THRB
pathway_name ligand
Testosterone-Testosterone-HSD17B12_AR Testosterone Testosterone-HSD17B12
Testosterone-Testosterone-HSD17B3_AR Testosterone Testosterone-HSD17B3
Thyrostimulin-GPHB5_GPHA2_TSHR TSH GPHB5_GPHA2
Thyrotropin-CGA_TSHB_TSHR TSH CGA_TSHB
Triiodothyronine-T3-DIO3_THRA Triiodothyronine T3-DIO3
Triiodothyronine-T3-DIO3_THRB Triiodothyronine T3-DIO3
receptor agonist evidence
Testosterone-Testosterone-HSD17B12_AR AR HMRbase;uniprot
Testosterone-Testosterone-HSD17B3_AR AR HMRbase;uniprot
Thyrostimulin-GPHB5_GPHA2_TSHR TSHR PMID: 12045258
Thyrotropin-CGA_TSHB_TSHR TSHR PMID: 12045259
Triiodothyronine-T3-DIO3_THRA THRA HMRbase;uniprot
Triiodothyronine-T3-DIO3_THRB THRB HMRbase;uniprot
annotation
Testosterone-Testosterone-HSD17B12_AR Non-protein Signaling
Testosterone-Testosterone-HSD17B3_AR Non-protein Signaling
Thyrostimulin-GPHB5_GPHA2_TSHR Non-protein Signaling
Thyrotropin-CGA_TSHB_TSHR Non-protein Signaling
Triiodothyronine-T3-DIO3_THRA Non-protein Signaling
Triiodothyronine-T3-DIO3_THRB Non-protein Signaling
Preprocess expression data
To infer the cell state-specific communications, CellChat identifies over-expressed ligands or receptors in one cell group and then identifies over-expressed ligand-receptor interactions if either ligand or receptor are over-expressed.
# subset the expression data of signaling genes for saving computation cost
cc_covid@DB <- CellChatDB
cc_covid <- subsetData(cc_covid) # This step is necessary even if using the whole database
future::plan("multisession", workers = 4) # do parallel
cc_covid <- identifyOverExpressedGenes(cc_covid)
cc_covid <- identifyOverExpressedInteractions(cc_covid)The number of highly variable ligand-receptor pairs used for signaling inference is 2205
# The number of highly variable ligand-receptor pairs used for signaling inference is 2212 Same for the flu
# subset the expression data of signaling genes for saving computation cost
cc_flu@DB <- CellChatDB
cc_flu <- subsetData(cc_flu) # This step is necessary even if using the whole database
future::plan("multisession", workers = 4) # do parallel
cc_flu <- identifyOverExpressedGenes(cc_flu)
cc_flu <- identifyOverExpressedInteractions(cc_flu)The number of highly variable ligand-receptor pairs used for signaling inference is 1720
# The number of highly variable ligand-receptor pairs used for signaling inference is 1728 Inference of cell-cell communication network
From the CellChat vignette: CellChat infers the biologically significant cell-cell communication by assigning each interaction with a probability value and peforming a permutation test. CellChat models the probability of cell-cell communication by integrating gene expression with prior known knowledge of the interactions between signaling ligands, receptors and their cofactors using the law of mass action.
The number of inferred ligand-receptor pairs clearly depends on the method for calculating the average gene expression per cell group. trimean approximates 25% truncated mean, implying that the average gene expression is zero if the percent of expressed cells in one group is less than 25%.
By setting this stringent parameters we ensure we are capturing robust signal and removing spurious cell-cell communication.
cc_covid <- computeCommunProb(cc_covid, type = "triMean")triMean is used for calculating the average gene expression per cell group.
[1] ">>> Run CellChat on sc/snRNA-seq data <<< [2024-03-01 15:37:27.42642]"
[1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2024-03-01 15:40:00.585418]"
cc_flu <- computeCommunProb(cc_flu, type = "triMean")triMean is used for calculating the average gene expression per cell group.
[1] ">>> Run CellChat on sc/snRNA-seq data <<< [2024-03-01 15:40:02.813353]"
[1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2024-03-01 15:41:30.726753]"
Summarize CCC to a signaling pathway level
We can summarize the individual ligand-receptor interactions into pathways for a broader look at the data.
cc_covid <- computeCommunProbPathway(cc_covid)
cc_flu <- computeCommunProbPathway(cc_flu)
## Look at all the pathways availavle
cc_covid@netP$pathways [1] "MHC-I" "MHC-II" "MIF" "CD99"
[5] "GALECTIN" "CypA" "CLEC" "ICAM"
[9] "ANNEXIN" "ADGRE" "CD45" "CCL"
[13] "APP" "Prostaglandin" "BAFF" "SELPLG"
[17] "LCK" "PECAM1" "CD23" "RESISTIN"
[21] "TGFb" "Cholesterol" "VISFATIN" "IL16"
[25] "SEMA4" "PARs" "GRN" "TNF"
[29] "CXCL" "GP1BA" "COLLAGEN" "CD40"
[33] "CysLTs" "CD160" "CD86" "LAIR1"
[37] "CD6" "PECAM2" "IFN-II" "ESAM"
[41] "ICOS" "BAG" "FLT3" "IL1"
[45] "TWEAK" "NECTIN" "MPZ"
Calculate the aggregated cell-cell communication network by counting the number of links or summarizing the communication probability.
cc_covid <- aggregateNet(cc_covid)
cc_flu <- aggregateNet(cc_flu)Lastly we can compute the network centrality scores of the cell types identifying key senders, receivers, mediators…
# the slot 'netP' means the inferred intercellular communication network of signaling pathways
cc_covid <- netAnalysis_computeCentrality(cc_covid, slot.name = "netP")
cc_flu <- netAnalysis_computeCentrality(cc_flu, slot.name = "netP") Visualize inferred IFN-II signaling network
123+123[1] 246
245+245[1] 490
With Heatmap
object.list <- list(covid = cc_covid, flu = cc_flu)
par(mfrow = c(1, 2))
plt_ls <- lapply(seq_len(length(object.list)), function(i) {
netVisual_heatmap(
object.list[[i]],
signaling = "IFN-II",
color.heatmap = "Reds",
title.name = glue::glue("IFN-II signaling network - {names(object.list)[i]}"))
})
draw(plt_ls[[1]] + plt_ls[[2]], ht_gap = unit(.5, "cm"))Merge Covid and Flue objects
Now we can merge the CellChat objects and work with them together. The rest of the analysis follows the comparison vignette.
cellchat <- mergeCellChat(object.list, add.names = names(object.list), cell.prefix = TRUE)The cell barcodes in merged 'meta' is AAACCCAAGAATGTTG-12 AAACCCAAGGCCTGCT-12 AAACCCAAGGGCAATC-1 AAACCCAAGTAAGGGA-18 AAACCCACAAGAGCTG-17 AAACCCACACAGTCGC-20
Save objects in case we need them for later use
saveRDS(object.list, file = "../data/cellchat_ls.rds")
# rm(object.list); gc()Differential interaction analysis
Let’s start by comparing the the total number of interactions
gg1 <- compareInteractions(
cellchat,
show.legend = FALSE,
group = c(1,2))
gg2 <- compareInteractions(
cellchat,
show.legend = FALSE,
group = c(1,2),
measure = "weight")
gg1 + gg2With this plot we can visualize how the covid dataset has a slightly larger number of interactions but the interaction strength within the flu dataset are stronger.
We can also take a look at how these interactions are different between covid and flu. In the colorbar, red (or blue) represents increased (or decreased) signaling in the second dataset compared to the first one. In our case red is stronger in Flu and blue in covid
gg1 <- netVisual_heatmap(cellchat)
gg2 <- netVisual_heatmap(cellchat, measure = "weight")
gg1 + gg2We can also assess how much the outgoing and incoming signal changes between conditions by looking at their absolute signals. Dot size is proportional to the number of inferred links (both outgoing and incoming) associated with each cell group.
# num.link <- sapply(object.list, function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)})
# weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets
# gg <- list()
# for (i in 1:length(object.list)) {
# gg[[i]] <- netAnalysis_signalingRole_scatter(
# object.list[[i]],
# title = names(object.list)[i],
# weight.MinMax = weight.MinMax)
# }
# patchwork::wrap_plots(plots = gg)Prepare the data
slot.name <- "netP"
x.measure <- "outdeg"
y.measure <- "indeg"
signaling <- NULL
df_ls <- lapply(names(object.list), function(nm) {
object <- object.list[[nm]]
centr <- slot(object, slot.name)$centr
outgoing <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr))
incoming <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr))
dimnames(outgoing) <- list(levels(object@idents), names(centr))
dimnames(incoming) <- dimnames(outgoing)
for (i in 1:length(centr)) {
outgoing[, i] <- centr[[i]][[x.measure]]
incoming[, i] <- centr[[i]][[y.measure]]
}
outgoing.cells <- rowSums(outgoing)
incoming.cells <- rowSums(incoming)
num.link <- aggregateNet(object, signaling = signaling, return.object = FALSE,
remove.isolate = FALSE)$count
num.link <- rowSums(num.link) + colSums(num.link) - diag(num.link)
df <- data.frame(
outgoing.cells,
incoming.cells,
names(incoming.cells),
num.link,
nm)
colnames(df) <- c(paste0("outgoing_", nm), paste0("incoming_", nm), "labels", paste0("count_", nm))
df$labels <- factor(df$labels, levels = names(incoming.cells))
df
})Visualize how the interactions changed
require(gridExtra)
library(colorBlindness)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- length(unique(se$Celltype))
col_pal <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
names(col_pal) <- sort(unique(se$Celltype))
# Create a ggplot with 18 colors
df_ls[[1]] %>%
left_join(df_ls[[2]], by = "labels") %>%
ggplot() +
geom_point(aes(x = outgoing_flu, y = incoming_flu, color = labels,
fill = "Flu", size = count_flu), alpha = 0.5) +
geom_point(aes(x = outgoing_covid, y = incoming_covid, color = labels,
fill = "Covid", size = count_covid), size = 3, pch = 21) +
geom_segment(aes(
x = outgoing_covid,
y = incoming_covid,
xend = outgoing_flu,
yend = incoming_flu,
color = labels),
arrow = arrow(angle = 40, length = grid::unit(.35, "cm")),
show.legend = FALSE) +
theme_classic() +
guides(
size = FALSE,
shape = FALSE,
colour = guide_legend(override.aes = list(size = 5))) +
scale_color_manual(values = col_pal) +
labs(
title = "Cell-Cell interaction strength shifts from Covid to Flu",
# x = "Outgoing strength",
# y = "Incoming strength",
y = NULL, x = NULL,
color = "Cell Type",
fill = "Disease") +
scale_fill_manual(values = c("white", "black")) +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_line()) +
scale_y_continuous(labels = function(x) {
case_when(x == 25 ~ "25 Incoming\nStrength",
TRUE ~ as.character(round(x)))
}) +
scale_x_continuous(labels = function(x) {
case_when(x == 10 ~ "10\nOutgoing\nStrength",
TRUE ~ as.character(round(x)))
}) +
coord_fixed()object <- object.list[[1]]
slot.name <- "netP"
x.measure <- "outdeg"
y.measure <- "indeg"
signaling <- NULL
centr <- slot(object, "netP")$centr
outgoing <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr))
incoming <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr))
dimnames(outgoing) <- list(levels(object@idents), names(centr))
dimnames(incoming) <- dimnames(outgoing)
for (i in 1:length(centr)) {
outgoing[, i] <- centr[[i]][[x.measure]]
incoming[, i] <- centr[[i]][[y.measure]]
}
signaling <- signaling[signaling %in% object@netP$pathways]
outgoing <- outgoing[, signaling, drop = FALSE]
incoming <- incoming[, signaling, drop = FALSE]
outgoing.cells <- rowSums(outgoing)
incoming.cells <- rowSums(incoming)
num.link <- aggregateNet(object, signaling = signaling, return.object = FALSE,
remove.isolate = FALSE)$count
num.link <- rowSums(num.link) + colSums(num.link) - diag(num.link)
df <- data.frame(x = outgoing.cells, y = incoming.cells,
labels = names(incoming.cells), Count = num.link)
df$labels <- factor(df$labels, levels = names(incoming.cells))In this case we can see how CD8 T cells greatly increase their incoming signal as well as intermediate monocytes. NKs and CD4 EM-like, in turn, greatly increase their outgoing signal while not increasing their incoming signal.
CellChat allows us to explore signalling changes within one specific population between both conditions. Positive values highlight increases in the second group (Flu) while negative values are increases in the first group (covid).
netAnalysis_signalingChanges_scatter(cellchat, idents.use = "CD4, EM-like")Differential pathway signalling
Look at broad changes in pathway signalling across the entire dataset
gg1 <- rankNet(cellchat, mode = "comparison", measure = "weight", sources.use = NULL, targets.use = NULL, stacked = T, do.stat = TRUE)
gg2 <- rankNet(cellchat, mode = "comparison", measure = "weight", sources.use = NULL, targets.use = NULL, stacked = F, do.stat = TRUE)
gg1 + gg2TKTK not sure about this
We can also take a look at these changes by cell type:
library(ComplexHeatmap)
# combining all the identified signaling pathways from different datasets
pathway.union <- union(object.list[["covid"]]@netP$pathways, object.list[["flu"]]@netP$pathways)
ht1 <- netAnalysis_signalingRole_heatmap(
object.list[["covid"]],
pattern = "outgoing",
signaling = pathway.union,
title = "covid",
width = 5,
height = 12)
ht2 <- netAnalysis_signalingRole_heatmap(
object.list[["flu"]],
pattern = "outgoing",
signaling = pathway.union,
title = "flu",
width = 5,
height = 12)
draw(ht1 + ht2, ht_gap = unit(.5, "cm"))TKTK not sure about this
# combining all the identified signaling pathways from different datasets
pathway.union <- union(object.list[["covid"]]@netP$pathways, object.list[["flu"]]@netP$pathways)
ht1 <- netAnalysis_signalingRole_heatmap(
object.list[["covid"]],
pattern = "incoming",
signaling = pathway.union,
title = "covid",
width = 5,
height = 12)
ht2 <- netAnalysis_signalingRole_heatmap(
object.list[["flu"]],
pattern = "incoming",
signaling = pathway.union,
title = "flu",
width = 5,
height = 12)
draw(ht1 + ht2, ht_gap = unit(.5, "cm"))levels(cellchat@idents$joint) [1] "B cell, IgG-" "B cell, IgG+" "CD4, EM-like"
[4] "CD4, non-EM-like" "CD8, EM-like" "CD8, non-EM-like"
[7] "classical Monocyte" "DC" "intermediate Monocyte"
[10] "NK cell" "nonclassical Monocyte" "Platelet"
[13] "RBC"
netVisual_bubble(
cellchat,
sources.use = "NK cell",
targets.use = c("DC", "intermediate Monocyte"),
comparison = c(1, 2),
angle.x = 45)Session Info
sessionInfo()R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] grid stats4 stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] RColorBrewer_1.1-3 colorBlindness_0.1.9 gridExtra_2.3
[4] ComplexHeatmap_2.16.0 org.Hs.eg.db_3.17.0 AnnotationDbi_1.62.2
[7] IRanges_2.34.1 S4Vectors_0.38.1 CellChat_2.1.2
[10] Biobase_2.60.0 BiocGenerics_0.46.0 ggplot2_3.5.0
[13] igraph_2.0.2 dplyr_1.1.4 Seurat_5.0.1
[16] SeuratObject_5.0.1 sp_2.1-3
loaded via a namespace (and not attached):
[1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3
[4] bitops_1.0-7 devtools_2.4.5 httr_1.4.7
[7] doParallel_1.0.17 profvis_0.3.8 tools_4.3.1
[10] sctransform_0.4.1 backports_1.4.1 utf8_1.2.4
[13] R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16
[16] GetoptLong_1.0.5 urlchecker_1.0.1 withr_3.0.0
[19] prettyunits_1.1.1 progressr_0.14.0 cli_3.6.2
[22] Cairo_1.6-1 spatstat.explore_3.2-6 fastDummies_1.7.3
[25] network_1.18.2 labeling_0.4.3 sass_0.4.8
[28] spatstat.data_3.0-4 readr_2.1.4 ggridges_0.5.6
[31] pbapply_1.7-2 systemfonts_1.0.4 svglite_2.1.3
[34] parallelly_1.37.0 sessioninfo_1.2.2 rstudioapi_0.15.0
[37] RSQLite_2.3.1 FNN_1.1.4 gridGraphics_0.5-1
[40] generics_0.1.3 shape_1.4.6 vroom_1.6.3
[43] ica_1.0-3 spatstat.random_3.2-2 car_3.1-2
[46] Matrix_1.6-5 fansi_1.0.6 abind_1.4-5
[49] lifecycle_1.0.4 yaml_2.3.8 carData_3.0-5
[52] Rtsne_0.17 blob_1.2.4 promises_1.2.1
[55] crayon_1.5.2 miniUI_0.1.1.1 lattice_0.21-8
[58] cowplot_1.1.3 KEGGREST_1.40.0 magick_2.7.5
[61] sna_2.7-2 pillar_1.9.0 knitr_1.45
[64] rjson_0.2.21 future.apply_1.11.1 codetools_0.2-19
[67] leiden_0.4.3.1 glue_1.7.0 data.table_1.15.0
[70] remotes_2.4.2.1 vctrs_0.6.5 png_0.1-8
[73] spam_2.10-0 gtable_0.3.4 cachem_1.0.8
[76] xfun_0.42 mime_0.12 tidyverse_2.0.0
[79] coda_0.19-4.1 survival_3.5-7 iterators_1.0.14
[82] ellipsis_0.3.2 fitdistrplus_1.1-11 ROCR_1.0-11
[85] nlme_3.1-163 usethis_2.2.2 bit64_4.0.5
[88] RcppAnnoy_0.0.22 GenomeInfoDb_1.36.3 bslib_0.6.1
[91] irlba_2.3.5.1 KernSmooth_2.23-22 colorspace_2.1-0
[94] DBI_1.1.3 tidyselect_1.2.0 processx_3.8.2
[97] bit_4.0.5 compiler_4.3.1 curl_5.2.0
[100] BiocNeighbors_1.18.0 plotly_4.10.4 scales_1.3.0
[103] lmtest_0.9-40 callr_3.7.3 NMF_0.26
[106] stringr_1.5.1 digest_0.6.34 goftest_1.2-3
[109] spatstat.utils_3.0-4 presto_1.0.0 rmarkdown_2.25
[112] XVector_0.40.0 htmltools_0.5.7 pkgconfig_2.0.3
[115] fastmap_1.1.1 rlang_1.1.3 GlobalOptions_0.1.2
[118] htmlwidgets_1.6.4 shiny_1.8.0 farver_2.1.1
[121] jquerylib_0.1.4 zoo_1.8-12 jsonlite_1.8.8
[124] BiocParallel_1.34.2 statnet.common_4.9.0 RCurl_1.98-1.12
[127] magrittr_2.0.3 GenomeInfoDbData_1.2.10 ggnetwork_0.5.13
[130] dotCall64_1.1-1 patchwork_1.2.0 munsell_0.5.0
[133] Rcpp_1.0.12 reticulate_1.35.0.9000 stringi_1.8.3
[136] ggalluvial_0.12.5 zlibbioc_1.46.0 MASS_7.3-60
[139] plyr_1.8.9 pkgbuild_1.4.2 parallel_4.3.1
[142] listenv_0.9.1 ggrepel_0.9.5 deldir_2.0-2
[145] Biostrings_2.68.1 splines_4.3.1 tensor_1.5
[148] hms_1.1.3 circlize_0.4.15 ps_1.7.5
[151] ggpubr_0.6.0 spatstat.geom_3.2-8 ggsignif_0.6.4
[154] RcppHNSW_0.6.0 rngtools_1.5.2 reshape2_1.4.4
[157] pkgload_1.3.2.1 evaluate_0.23 BiocManager_1.30.22
[160] tzdb_0.4.0 foreach_1.5.2 httpuv_1.6.14
[163] RANN_2.6.1 tidyr_1.3.1 purrr_1.0.2
[166] polyclip_1.10-6 future_1.33.1 clue_0.3-64
[169] scattermore_1.2 gridBase_0.4-7 broom_1.0.5
[172] xtable_1.8-4 RSpectra_0.16-1 rstatix_0.7.2
[175] later_1.3.2 viridisLite_0.4.2 tibble_3.2.1
[178] memoise_2.0.1 registry_0.5-1 cluster_2.1.4
[181] globals_0.16.2